import re

import requests
from bs4 import BeautifulSoup4
import bs4


def getHTML(origin_url):  # 在所有省份中找到 "黔" 的高校
    try:
        r = requests.get(origin_url, timeout=30)  # 获取网页内容
        r.raise_for_status()  # 返回200则内容正确
        r.encoding = r.apparent_encoding  # 设置编码格式
    except:
        return ""
    soup = bs4.BeautifulSoup(r.text, "html.parser")
    for x in soup.find_all('a', string='黔'):
        return x.get('href')


def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)  # 获取网页内容
        r.raise_for_status()  # 返回200则内容正确
        r.encoding = r.apparent_encoding  # 设置编码格式
        return r.text  # 以文本形式返回
    except:
        return ""


def printList(html):
    soup = bs4.BeautifulSoup(html, "html.parser")
    tables = ['贵州本科高校排名信息', '贵州专科高校排名信息', '贵州独立学院排名信息', '贵州民办高校排名信息']
    i = 0
    for x in soup.find_all('td', class_='FONT'):
        print("\n\n\t\t\t\t\t\t\t{index}".format(index=tables[i]))
        i = i + 1
        bd = str(x)
        bd = re.sub("<[^>]*>", "", bd)  # 去除所有标签
        print(bd)
        print("----------------------------------------------------------------------------------------------------")


def main():
    url = 'http://www.huaue.com/'
    url = getHTML(url)
    html = getHTMLText(url)
    printList(html)


main()